package com.galeno.算子

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

/**
 * @Title: ${file_name}
 * @Description: ${todo}
 * @author galeno
 * @date 2021/8/2616:04
 */
object 转换算子1 {
  def main(args: Array[String]): Unit = {
    val sc = new SparkContext(new SparkConf().setAppName(this.getClass.getName).setMaster("local"))
    val list = List(1, 1, 2, 3, 4, 5)
    val rdd1: RDD[Int] = sc.makeRDD(list)


    val rdd_stbk = sc.makeRDD(
      Seq(("a",1 ),("b",2), ("a" ,10),("b" ,20),("c" ,40),("d",4),("d",18), ("d",30),("e",10)),
    numSlices = 3
    )
    //按k全局有序
    val rdd_stbk_res = rdd_stbk.sortByKey(  true,   2)
    rdd_stbk_res. saveAsTextFile( path = "dataout/ stbk/")
    // TODO按kv全局有序( 把原来数据的kv,映射成(kv, null)就能利用sortByKey 做全局排序
    // TODO但是这么一来，rangePartition 的构造就成问题了，Key:kv元组比大小也成 了问题
    var rdd_stbk_res2 = rdd_stbk . map(tp=>(tp, null)) . sortByKey( ascending = true, numPartitions = 2)
    rdd_stbk_res2.saveAsTextFile( path = "dataout/stbk2/")
    //


  }


}
